#Creating a dataframe
Telecom_Data <- data.frame(read.csv("Telecom Data.csv"))
#Printing the structure and rows/columns of data
str(Telecom_Data)
## 'data.frame': 51047 obs. of 58 variables:
## $ CustomerID : int 3000002 3000010 3000014 3000022 3000026 3000030 3000038 3000042 3000046 3000050 ...
## $ Churn : chr "Yes" "Yes" "No" "No" ...
## $ MonthlyRevenue : num 24 17 38 82.3 17.1 ...
## $ MonthlyMinutes : int 219 10 8 1312 0 682 26 98 24 1056 ...
## $ TotalRecurringCharge : int 22 17 38 75 17 52 30 66 35 75 ...
## $ DirectorAssistedCalls : num 0.25 0 0 1.24 0 0.25 0.25 2.48 0 0 ...
## $ OverageMinutes : int 0 0 0 0 0 0 0 0 0 0 ...
## $ RoamingCalls : num 0 0 0 0 0 0 0 0 0 0 ...
## $ PercChangeMinutes : int -157 -4 -2 157 0 148 60 24 20 43 ...
## $ PercChangeRevenues : num -19 0 0 8.1 -0.2 -3.1 4 6.8 -0.3 2.4 ...
## $ DroppedCalls : num 0.7 0.3 0 52 0 9 0 0 0 0 ...
## $ BlockedCalls : num 0.7 0 0 7.7 0 1.7 1 0.3 0 0 ...
## $ UnansweredCalls : num 6.3 2.7 0 76 0 13 2.3 4 1 0 ...
## $ CustomerCareCalls : num 0 0 0 4.3 0 0.7 0 4 0 0 ...
## $ ThreewayCalls : num 0 0 0 1.3 0 0 0 0 0 0 ...
## $ ReceivedCalls : num 97.2 0 0.4 200.3 0 ...
## $ OutboundCalls : num 0 0 0.3 370.3 0 ...
## $ InboundCalls : num 0 0 0 147 0 0 0 0 1.7 0 ...
## $ PeakCallsInOut : num 58 5 1.3 555.7 0 ...
## $ OffPeakCallsInOut : num 24 1 3.7 303.7 0 ...
## $ DroppedBlockedCalls : num 1.3 0.3 0 59.7 0 10.7 1 0.3 0 0 ...
## $ CallForwardingCalls : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CallWaitingCalls : num 0.3 0 0 22.7 0 0.7 0 0 0 0 ...
## $ MonthsInService : int 61 58 60 59 53 53 57 59 53 55 ...
## $ UniqueSubs : int 2 1 1 2 2 1 2 2 3 1 ...
## $ ActiveSubs : int 1 1 1 2 2 1 2 2 3 1 ...
## $ ServiceArea : chr "SEAPOR503" "PITHOM412" "MILMIL414" "PITHOM412" ...
## $ Handsets : int 2 2 1 9 4 3 2 3 4 9 ...
## $ HandsetModels : int 2 1 1 4 3 2 2 3 3 5 ...
## $ CurrentEquipmentDays : int 361 1504 1812 458 852 231 601 464 544 388 ...
## $ AgeHH1 : int 62 40 26 30 46 28 52 46 36 46 ...
## $ AgeHH2 : int 0 42 26 0 54 0 58 46 34 68 ...
## $ ChildrenInHH : chr "No" "Yes" "Yes" "No" ...
## $ HandsetRefurbished : chr "No" "No" "No" "No" ...
## $ HandsetWebCapable : chr "Yes" "No" "No" "Yes" ...
## $ TruckOwner : chr "No" "No" "No" "No" ...
## $ RVOwner : chr "No" "No" "No" "No" ...
## $ Homeownership : chr "Known" "Known" "Unknown" "Known" ...
## $ BuysViaMailOrder : chr "Yes" "Yes" "No" "Yes" ...
## $ RespondsToMailOffers : chr "Yes" "Yes" "No" "Yes" ...
## $ OptOutMailings : chr "No" "No" "No" "No" ...
## $ NonUSTravel : chr "No" "No" "No" "No" ...
## $ OwnsComputer : chr "Yes" "Yes" "No" "No" ...
## $ HasCreditCard : chr "Yes" "Yes" "Yes" "Yes" ...
## $ RetentionCalls : int 1 0 0 0 0 0 0 0 0 0 ...
## $ RetentionOffersAccepted : int 0 0 0 0 0 0 0 0 0 0 ...
## $ NewCellphoneUser : chr "No" "Yes" "Yes" "Yes" ...
## $ NotNewCellphoneUser : chr "No" "No" "No" "No" ...
## $ ReferralsMadeBySubscriber: int 0 0 0 0 0 0 0 0 0 0 ...
## $ IncomeGroup : int 4 5 6 6 9 1 9 6 9 5 ...
## $ OwnsMotorcycle : chr "No" "No" "No" "No" ...
## $ AdjustmentsToCreditRating: int 0 0 0 0 1 1 1 0 0 1 ...
## $ HandsetPrice : chr "30" "30" "Unknown" "10" ...
## $ MadeCallToRetentionTeam : chr "Yes" "No" "No" "No" ...
## $ CreditRating : chr "1-Highest" "4-Medium" "3-Good" "4-Medium" ...
## $ PrizmCode : chr "Suburban" "Suburban" "Town" "Other" ...
## $ Occupation : chr "Professional" "Professional" "Crafts" "Other" ...
## $ MaritalStatus : chr "No" "Yes" "Yes" "No" ...
ncol(Telecom_Data)
## [1] 58
nrow(Telecom_Data)
## [1] 51047
# Boxplot for Credit Rating using ggplot
library(ggplot2)
ggplot(Telecom_Data, aes(y=CreditRating)) + geom_boxplot( colour="orange", fill="black") + ggtitle("Credit Rating using `ggplot`")

# Subsetting Churned and Retained data
Churned <- subset(Telecom_Data, Churn=="Yes")
Retained <- subset(Telecom_Data, Churn=="No")
str(Churned)
## 'data.frame': 14711 obs. of 58 variables:
## $ CustomerID : int 3000002 3000010 3000026 3000082 3000122 3000158 3000174 3000182 3000190 3000194 ...
## $ Churn : chr "Yes" "Yes" "Yes" "Yes" ...
## $ MonthlyRevenue : num 24 17 17.1 172.4 24.5 ...
## $ MonthlyMinutes : int 219 10 0 1978 42 196 4 684 852 782 ...
## $ TotalRecurringCharge : int 22 17 17 100 17 30 17 55 85 25 ...
## $ DirectorAssistedCalls : num 0.25 0 0 0 0 0 0 0 0 0 ...
## $ OverageMinutes : int 0 0 0 362 10 0 0 3 0 233 ...
## $ RoamingCalls : num 0 0 0 0 0 2.6 0 0 0 0 ...
## $ PercChangeMinutes : int -157 -4 0 -1007 81 54 -4 -2 -206 -258 ...
## $ PercChangeRevenues : num -19 0 -0.2 -72.5 29.9 8.6 0 0.6 0 -80 ...
## $ DroppedCalls : num 0.7 0.3 0 7.3 0 6.7 0 15.7 10.7 1 ...
## $ BlockedCalls : num 0.7 0 0 18 0 0.3 1 1.3 6.3 0.7 ...
## $ UnansweredCalls : num 6.3 2.7 0 114.3 0 ...
## $ CustomerCareCalls : num 0 0 0 0.3 0 0 0 5 1 0.3 ...
## $ ThreewayCalls : num 0 0 0 0.7 0 0.7 0 0 0 0 ...
## $ ReceivedCalls : num 97.2 0 0 515.2 0 ...
## $ OutboundCalls : num 0 0 0 22.7 0 9.7 0 19.3 54.7 1.3 ...
## $ InboundCalls : num 0 0 0 2.7 0 5.7 0 0.3 29.7 0 ...
## $ PeakCallsInOut : num 58 5 0 718 3 ...
## $ OffPeakCallsInOut : num 24 1 0 60.3 0.7 ...
## $ DroppedBlockedCalls : num 1.3 0.3 0 25.3 0 7 1 17 17 1.7 ...
## $ CallForwardingCalls : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CallWaitingCalls : num 0.3 0 0 20.3 0 0 0 0.7 6 2.7 ...
## $ MonthsInService : int 61 58 53 58 58 54 55 55 53 55 ...
## $ UniqueSubs : int 2 1 2 2 2 2 1 2 5 1 ...
## $ ActiveSubs : int 1 1 2 1 1 1 1 1 4 1 ...
## $ ServiceArea : chr "SEAPOR503" "PITHOM412" "OKCTUL918" "LOULOU502" ...
## $ Handsets : int 2 2 4 4 3 4 1 2 8 5 ...
## $ HandsetModels : int 2 1 3 3 3 3 1 2 3 4 ...
## $ CurrentEquipmentDays : int 361 1504 852 143 776 179 1661 864 151 162 ...
## $ AgeHH1 : int 62 40 46 48 36 50 64 50 50 36 ...
## $ AgeHH2 : int 0 42 54 32 0 48 64 0 50 38 ...
## $ ChildrenInHH : chr "No" "Yes" "No" "No" ...
## $ HandsetRefurbished : chr "No" "No" "No" "No" ...
## $ HandsetWebCapable : chr "Yes" "No" "No" "Yes" ...
## $ TruckOwner : chr "No" "No" "No" "Yes" ...
## $ RVOwner : chr "No" "No" "No" "Yes" ...
## $ Homeownership : chr "Known" "Known" "Known" "Known" ...
## $ BuysViaMailOrder : chr "Yes" "Yes" "Yes" "Yes" ...
## $ RespondsToMailOffers : chr "Yes" "Yes" "Yes" "Yes" ...
## $ OptOutMailings : chr "No" "No" "No" "No" ...
## $ NonUSTravel : chr "No" "No" "No" "No" ...
## $ OwnsComputer : chr "Yes" "Yes" "Yes" "No" ...
## $ HasCreditCard : chr "Yes" "Yes" "Yes" "Yes" ...
## $ RetentionCalls : int 1 0 0 1 0 0 0 0 0 0 ...
## $ RetentionOffersAccepted : int 0 0 0 0 0 0 0 0 0 0 ...
## $ NewCellphoneUser : chr "No" "Yes" "No" "No" ...
## $ NotNewCellphoneUser : chr "No" "No" "Yes" "No" ...
## $ ReferralsMadeBySubscriber: int 0 0 0 0 0 0 0 0 0 0 ...
## $ IncomeGroup : int 4 5 9 6 9 9 6 9 5 7 ...
## $ OwnsMotorcycle : chr "No" "No" "No" "No" ...
## $ AdjustmentsToCreditRating: int 0 0 1 0 0 1 0 1 0 0 ...
## $ HandsetPrice : chr "30" "30" "10" "150" ...
## $ MadeCallToRetentionTeam : chr "Yes" "No" "No" "Yes" ...
## $ CreditRating : chr "1-Highest" "4-Medium" "1-Highest" "1-Highest" ...
## $ PrizmCode : chr "Suburban" "Suburban" "Other" "Other" ...
## $ Occupation : chr "Professional" "Professional" "Professional" "Professional" ...
## $ MaritalStatus : chr "No" "Yes" "Yes" "Unknown" ...
str(Retained)
## 'data.frame': 36336 obs. of 58 variables:
## $ CustomerID : int 3000014 3000022 3000030 3000038 3000042 3000046 3000050 3000054 3000058 3000062 ...
## $ Churn : chr "No" "No" "No" "No" ...
## $ MonthlyRevenue : num 38 82.3 38 31.7 62.1 ...
## $ MonthlyMinutes : int 8 1312 682 26 98 24 1056 2 1972 270 ...
## $ TotalRecurringCharge : int 38 75 52 30 66 35 75 25 85 37 ...
## $ DirectorAssistedCalls : num 0 1.24 0.25 0.25 2.48 0 0 0 2.23 0.25 ...
## $ OverageMinutes : int 0 0 0 0 0 0 0 0 250 6 ...
## $ RoamingCalls : num 0 0 0 0 0 0 0 0 35.5 0 ...
## $ PercChangeMinutes : int -2 157 148 60 24 20 43 -2 -200 202 ...
## $ PercChangeRevenues : num 0 8.1 -3.1 4 6.8 ...
## $ DroppedCalls : num 0 52 9 0 0 0 0 0 9 3.3 ...
## $ BlockedCalls : num 0 7.7 1.7 1 0.3 0 0 0 0 1.7 ...
## $ UnansweredCalls : num 0 76 13 2.3 4 1 0 0.3 43.7 7.7 ...
## $ CustomerCareCalls : num 0 4.3 0.7 0 4 0 0 0 0.3 1 ...
## $ ThreewayCalls : num 0 1.3 0 0 0 0 0 0 0 0 ...
## $ ReceivedCalls : num 0.4 200.3 42.2 0 0 ...
## $ OutboundCalls : num 0.3 370.3 6.7 0 3.7 ...
## $ InboundCalls : num 0 147 0 0 0 1.7 0 0 4.7 3.7 ...
## $ PeakCallsInOut : num 1.3 555.7 33.3 1.7 7.7 ...
## $ OffPeakCallsInOut : num 3.7 303.7 53 1.7 7.3 ...
## $ DroppedBlockedCalls : num 0 59.7 10.7 1 0.3 0 0 0 9 5 ...
## $ CallForwardingCalls : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CallWaitingCalls : num 0 22.7 0.7 0 0 0 0 0 1 0.3 ...
## $ MonthsInService : int 60 59 53 57 59 53 55 53 59 55 ...
## $ UniqueSubs : int 1 2 1 2 2 3 1 2 5 2 ...
## $ ActiveSubs : int 1 2 1 2 2 3 1 2 1 2 ...
## $ ServiceArea : chr "MILMIL414" "PITHOM412" "OKCTUL918" "OKCTUL918" ...
## $ Handsets : int 1 9 3 2 3 4 9 2 10 5 ...
## $ HandsetModels : int 1 4 2 2 3 3 5 2 6 4 ...
## $ CurrentEquipmentDays : int 1812 458 231 601 464 544 388 354 199 697 ...
## $ AgeHH1 : int 26 30 28 52 46 36 46 0 30 58 ...
## $ AgeHH2 : int 26 0 0 58 46 34 68 0 22 58 ...
## $ ChildrenInHH : chr "Yes" "No" "No" "No" ...
## $ HandsetRefurbished : chr "No" "No" "No" "No" ...
## $ HandsetWebCapable : chr "No" "Yes" "Yes" "Yes" ...
## $ TruckOwner : chr "No" "No" "No" "No" ...
## $ RVOwner : chr "No" "No" "No" "No" ...
## $ Homeownership : chr "Unknown" "Known" "Known" "Known" ...
## $ BuysViaMailOrder : chr "No" "Yes" "No" "Yes" ...
## $ RespondsToMailOffers : chr "No" "Yes" "No" "Yes" ...
## $ OptOutMailings : chr "No" "No" "No" "No" ...
## $ NonUSTravel : chr "No" "No" "Yes" "Yes" ...
## $ OwnsComputer : chr "No" "No" "No" "No" ...
## $ HasCreditCard : chr "Yes" "Yes" "Yes" "Yes" ...
## $ RetentionCalls : int 0 0 0 0 0 0 0 0 0 0 ...
## $ RetentionOffersAccepted : int 0 0 0 0 0 0 0 0 0 0 ...
## $ NewCellphoneUser : chr "Yes" "Yes" "Yes" "No" ...
## $ NotNewCellphoneUser : chr "No" "No" "No" "Yes" ...
## $ ReferralsMadeBySubscriber: int 0 0 0 0 0 0 0 0 0 0 ...
## $ IncomeGroup : int 6 6 1 9 6 9 5 7 3 1 ...
## $ OwnsMotorcycle : chr "No" "No" "No" "No" ...
## $ AdjustmentsToCreditRating: int 0 0 1 1 0 0 1 0 1 1 ...
## $ HandsetPrice : chr "Unknown" "10" "30" "30" ...
## $ MadeCallToRetentionTeam : chr "No" "No" "No" "No" ...
## $ CreditRating : chr "3-Good" "4-Medium" "3-Good" "1-Highest" ...
## $ PrizmCode : chr "Town" "Other" "Other" "Other" ...
## $ Occupation : chr "Crafts" "Other" "Other" "Self" ...
## $ MaritalStatus : chr "Yes" "No" "Yes" "Yes" ...
# Barplot for Credit Rating of Churned Data vs Count using ggplot
library(ggplot2)
ggplot(Churned, aes(x = CreditRating)) + geom_bar(col="black", fill="red", alpha=0.4) + ggtitle("Credit Rating for Churned Telecom Data") + labs(x="Credit Rating (x-axis)", y=" Count (y-axis)") + ylim(0,15000) + theme_classic()

# Barplot for Credit Rating of Retained Data vs Count using ggplot
library(ggplot2)
ggplot(Retained, aes(x = CreditRating)) + geom_bar(col="black", fill="aquamarine3", alpha=0.6) + ggtitle("Credit Rating for Retained Telecom Data") + labs(x="Credit Rating (x-axis)", y=" Count (y-axis)")+ ylim(0,15000) + theme_classic()

#Histogram for representing Age of Customers in Telecom Data
library(plotly)
ggplot(Telecom_Data, aes(x=AgeHH1))+ geom_histogram(color="aquamarine4",fill = "aquamarine3",alpha=0.6, bins=30) + labs(x="Age of Customers", y="Frequency",
title="Histogram of Customer Age") + theme_classic()

library(dplyr)
#Filtering Age of Primary User
AgeFiltered = filter(Telecom_Data, AgeHH1== 0)
nrow(AgeFiltered)
## [1] 13917
(13917/nrow(Telecom_Data))*100
## [1] 27.3
library(ggplot2)
#Boxplot Representing Customer Age Group in Telecom Data using ggplot
ggplot(Telecom_Data, aes(y=AgeHH1)) + geom_boxplot( colour="maroon", fill="aquamarine3",alpha=0.6) + ggtitle("Boxplot of Customer Age group`") + labs(x="Age of Customers", y=" Frequency") + theme_classic()

library(plotly)
#Boxplot Representing Customer Age Group in Telecom Data using plotly
plot_ly(Telecom_Data, y= Telecom_Data$AgeHH1,type = "box", color = Telecom_Data$Churn) %>%
layout(boxmode = "group",
xaxis = list(title=''),
yaxis = list(title='Frequency'))
#Barchart Representing Occupations of Customers in Telecom Data using ggplot
library(ggplot2)
ggplot(Telecom_Data, aes(x=Occupation)) + geom_bar(stat="count", fill="maroon") + labs(x="Occupation", y=" Count", title= "Bar Chart of Occupations of Customers")
